{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "train_data=pd.read_csv('data/train.csv')\n",
    "test_df=pd.read_csv('data/test.csv')\n",
    "train_df=train_data[train_data.loc[:,'Time']<3]\n",
    "val_df=train_data[train_data.loc[:,'Time']==3]\n",
    "\n",
    "del train_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## baseline\n",
    "以默认参数的XGB分数为准，低于此基准线2.554的模型一律不考虑。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def xgb_eval(train_df,val_df):\n",
    "    train_df=train_df.copy()\n",
    "    val_df=val_df.copy()\n",
    "\n",
    "    try:\n",
    "        from sklearn.preprocessing import LabelEncoder\n",
    "        lb_encoder=LabelEncoder()\n",
    "        lb_encoder.fit(train_df.loc[:,'RoomDir'].append(val_df.loc[:,'RoomDir']))\n",
    "        train_df.loc[:,'RoomDir']=lb_encoder.transform(train_df.loc[:,'RoomDir'])\n",
    "        val_df.loc[:,'RoomDir']=lb_encoder.transform(val_df.loc[:,'RoomDir'])\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "\n",
    "    import xgboost as xgb\n",
    "    X_train=train_df.drop(['Rental'],axis=1)\n",
    "    Y_train=train_df.loc[:,'Rental'].values\n",
    "    X_val=val_df.drop(['Rental'],axis=1)\n",
    "    Y_val=val_df.loc[:,'Rental'].values\n",
    "\n",
    "    from sklearn.metrics import mean_squared_error\n",
    "\n",
    "    try:\n",
    "        eval_df=val_df.copy().drop('Time',axis=1)\n",
    "    except Exception as e:\n",
    "        eval_df=val_df.copy()\n",
    "\n",
    "    reg_model=xgb.XGBRegressor(max_depth=5,n_estimators=500,n_jobs=-1)\n",
    "    reg_model.fit(X_train,Y_train)\n",
    "\n",
    "    y_pred=reg_model.predict(X_val)\n",
    "    print(np.sqrt(mean_squared_error(Y_val,y_pred)),end=' ')\n",
    "\n",
    "    eval_df.loc[:,'Y_pred']=y_pred\n",
    "    eval_df.loc[:,'RE']=eval_df.loc[:,'Y_pred']-eval_df.loc[:,'Rental']\n",
    "    \n",
    "    print('')\n",
    "    feature=X_train.columns\n",
    "    fe_im=reg_model.feature_importances_\n",
    "    print(pd.DataFrame({'fe':feature,'im':fe_im}).sort_values(by='im',ascending=False))\n",
    "\n",
    "    import matplotlib.pyplot as plt\n",
    "    plt.clf()\n",
    "    plt.figure(figsize=(15,4))\n",
    "    plt.plot([Y_train.min(),Y_train.max()],[0,0],color='red')\n",
    "    plt.scatter(x=eval_df.loc[:,'Rental'],y=eval_df.loc[:,'RE'])\n",
    "    plt.show()\n",
    "\n",
    "    return eval_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 默认RMSE为2.55426\n",
    "# eval_df=xgb_eval(train_df,val_df)\n",
    "# eval_df.to_csv('org_eval.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 原生特征的丢弃尝试\n",
    "以XGB做原生特征筛选，在原生特征中丢弃后不影响分数甚至涨分的特征有：Time，**RentRoom**(涨幅明显)，RoomDir，**Livingroom**，**RentType**(涨幅明显)，**SubwayLine**(涨幅明显)，**SubwayDis**(涨幅明显)。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 丢弃各特征后的分数\n",
    "# # ‘Time':2.558,'Neighborhood':2.592,'RentRoom':2.531,'Height':2.57,'TolHeight':2.591,'RoomArea':3\n",
    "# # 'RoomDir':2.548,'RentStatus':2.561,'Bedroom':2.584,'Livingroom':2.548,'Bathroom':2.590,'RentType':2.538\n",
    "# # 'Region':2.583,'BusLoc':2.594,'SubwayLine':2.521,'SubwaySta':2.569,'SubwayDis':2.537,'RemodCond':2.571\n",
    "# for col in train_df.columns:\n",
    "#     if col!='Rental':\n",
    "#         print('drop col:{}'.format(col))\n",
    "#         tmp_train_df=train_df.drop([col],axis=1)\n",
    "#         tmp_val_df=val_df.drop([col],axis=1)\n",
    "#         eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)\n",
    "\n",
    "# # 一起丢弃:2.553\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# tmp_train_df.drop(['Time','RentRoom','RoomDir','Livingroom','RentType','SubwayLine','SubwayDis'],axis=1,inplace=True)\n",
    "# tmp_val_df.drop(['Time','RentRoom','RoomDir','Livingroom','RentType','SubwayLine','SubwayDis'],axis=1,inplace=True)\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征工程\n",
    "这里并未包含我们尝试过的所有特征，有些代码删掉了。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 朝向特征取首次出现的朝向，此种处理方式的可视化结果与[南、东南、东、西南、北、西]的事实相符\n",
    "# # 得分2.548\n",
    "# def ex_dir(direction):\n",
    "#     try:\n",
    "#         direction=direction.split(' ')[0]\n",
    "#     except Exception as e:\n",
    "#         pass\n",
    "#     return direction\n",
    "\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# tmp_train_df.loc[:,'RoomDir']=train_df.loc[:,'RoomDir'].apply(ex_dir)\n",
    "# tmp_val_df.loc[:,'RoomDir']=val_df.loc[:,'RoomDir'].apply(ex_dir)\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)\n",
    "\n",
    "# # 朝向rank-encoding:2.550\n",
    "# rank_df=tmp_train_df.loc[:,['RoomDir','Rental']].groupby('RoomDir',as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "# rank_df.loc[:,'RoomDir'+'_rank']=rank_df.index+1\n",
    "# rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "# tmp_train_df=tmp_train_df.merge(rank_fe_df,how='left',on='RoomDir')\n",
    "# tmp_val_df=tmp_val_df.merge(rank_fe_df,how='left',on='RoomDir')\n",
    "# tmp_train_df.drop(['RoomDir'],axis=1,inplace=True)\n",
    "# tmp_val_df.drop(['RoomDir'],axis=1,inplace=True)\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 总房间数TolRooms:2.506\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# tmp_train_df.loc[:,'TolRooms']=tmp_train_df.loc[:,'Livingroom']+tmp_train_df.loc[:,'Bedroom']+tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_val_df.loc[:,'TolRooms']=tmp_val_df.loc[:,'Livingroom']+tmp_val_df.loc[:,'Bedroom']+tmp_val_df.loc[:,'Bathroom']\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)\n",
    "\n",
    "# # 再加上平均面积:2.515\n",
    "# tmp_train_df.loc[:,'Area/Room']=tmp_train_df.loc[:,'RoomArea']/(tmp_train_df.loc[:,'TolRooms']+1)\n",
    "# tmp_val_df.loc[:,'Area/Room']=tmp_val_df.loc[:,'RoomArea']/(tmp_val_df.loc[:,'TolRooms']+1)\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 领域知识特征：卧室面积占1/3，卫生间面积占1/14，客厅占1/3，单调变换对树模型无用，使用卧室(卫生间)平均面积\n",
    "# 掉分厉害，弃用\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy().\n",
    "\n",
    "# tmp_train_df.loc[:,'Area/Bedroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bedroom']\n",
    "# tmp_val_df.loc[:,'Area/Bedroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bedroom']\n",
    "# tmp_train_df.loc[:,'Area/Bathroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_val_df.loc[:,'Area/Bathroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_train_df.loc[:,'Area/Livingroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_val_df.loc[:,'Area/Livingroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_train_df.loc[:,'OtherArea']=tmp_train_df.loc[:,'RoomArea']-tmp_train_df.loc[:,'Area/Bedroom']-tmp_train_df.loc[:,'Area/Bathroom']-tmp_train_df.loc[:,'Area/Livingroom']\n",
    "# tmp_val_df.loc[:,'OtherArea']=tmp_val_df.loc[:,'RoomArea']-tmp_val_df.loc[:,'Area/Bedroom']-tmp_val_df.loc[:,'Area/Bathroom']-tmp_val_df.loc[:,'Area/Livingroom']\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 楼层出租屋总面积，未尝试\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "\n",
    "# total_area_train=tmp_train_df.groupbypby([''])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 以X室X厅制作一个户型特征:2.546\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# tmp_train_df.loc[:,'RoomStr']=tmp_train_df.loc[:,'Bedroom']+tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_val_df.loc[:,'RoomStr']=tmp_val_df.loc[:,'Bedroom']+tmp_val_df.loc[:,'Livingroom']\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 以相对高度Height除以相对总高度TolHeight得到一个绝对高度ab_Height\n",
    "# # 得分：2.561\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "\n",
    "# tmp_train_df.loc[:,'ab_Height']=tmp_train_df.loc[:,'Height']/(tmp_train_df.loc[:,'TolHeight']+1)\n",
    "# tmp_val_df.loc[:,'ab_Height']=tmp_val_df.loc[:,'Height']/(tmp_val_df.loc[:,'TolHeight']+1)\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 出租房的数目除以楼层总高度，计算该栋楼的出租率\n",
    "# # 得分：2.595\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "\n",
    "# tmp_train_df.loc[:,'Rent_pct']=tmp_train_df.loc[:,'RentRoom']/((tmp_train_df.loc[:,'TolHeight']+1))\n",
    "# tmp_val_df.loc[:,'Rent_pct']=tmp_val_df.loc[:,'RentRoom']/((tmp_val_df.loc[:,'TolHeight']+1))\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 猜想：能不能用数据分析推算出每间出租房所在小区的单元？？？\n",
    "# 小区已知，但小区里面有很多单元，能不能推算出单元？？？\n",
    "# 这是个类别特征，而且类别数应该会很多，比小区数还多\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 地铁特征，房子是否有'近地铁'这个属性:重要性为0\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# tmp_train_df.loc[:,'NearSubway']=(~tmp_train_df.loc[:,'SubwayLine'].isnull()).astype(np.int8).values\n",
    "# tmp_val_df.loc[:,'NearSubway']=(~tmp_val_df.loc[:,'SubwayLine'].isnull()).astype(np.int8).values\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据中有关于地铁的距离特征，这个怎么用？能不能利用这个地理信息进行聚类或rank\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 根据有序特征进行聚类，构造无监督特征\n",
    "# # eps=0.1，不编码原聚类特征\n",
    "# # 得分：2.544\n",
    "# cls_cols=['Height','TolHeight','RoomArea','Bedroom','Livingroom','Bathroom']        # 聚类列\n",
    "# all_cols=cls_cols+['Rental']\n",
    "\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "# need_scale=tmp_train_df.loc[:,cls_cols].append(tmp_val_df.loc[:,cls_cols])\n",
    "\n",
    "# from sklearn.preprocessing import MinMaxScaler\n",
    "# mm_scaler=MinMaxScaler()\n",
    "# need_scale=mm_scaler.fit_transform(need_scale)\n",
    "\n",
    "# from sklearn.cluster import DBSCAN\n",
    "# cls_model=DBSCAN(eps=0.1, min_samples=3,n_jobs=-1).fit(need_scale)\n",
    "\n",
    "# tmp_train_df.loc[:,'clsFe']=cls_model.labels_[:len(tmp_train_df)]\n",
    "# tmp_val_df.loc[:,'clsFe']=cls_model.labels_[len(tmp_train_df):]\n",
    "\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # 统计特征如计数值，区间等等，未尝试\n",
    "# cat_cols=['Neighborhood','Height','TolHeight','RoomDir','']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # rank_encoding(mean_encoding)\n",
    "# # 使用循环对所有列遍历做编码。两种编码方式类似，只是一个是离散特征，一个是连续特征，后者容易过拟合。\n",
    "\n",
    "# # 以平均租金为准添加评级特征，部分有用\n",
    "# # 'Time':2.554,'Height':2.549,'TolHeight'2.545\n",
    "# # 'Bedroom':2.546,'Livingroom':2.550,'Bathroom':2.551,'RentType':2.554\n",
    "# # 'Region':2.491,'BusLoc':2.480,'SubwayLine':2.539,'SubwaySta':'2.518','RemodCond':2.543\n",
    "# rank_cols=['Time','Height','TolHeight','Bedroom','Livingroom','Bathroom',\n",
    "#            'RentType','Region','BusLoc','SubwayLine','SubwaySta','RemodCond']\n",
    "# for col in rank_cols:\n",
    "#     if col!='Rental':\n",
    "#         print(col+'_rank_encoding...')\n",
    "#         tmp_train_df=train_df.copy()\n",
    "#         tmp_val_df=val_df.copy()\n",
    "\n",
    "#         rank_df=train_df.loc[:,[col,'Rental']].groupby(col,as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "#         rank_df.loc[:,col+'_rank']=rank_df.index+1        # +1，为缺失值预留一个0值的rank\n",
    "#         rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "#         tmp_train_df=tmp_train_df.merge(rank_fe_df,how='left',on=col)\n",
    "#         tmp_val_df=tmp_val_df.merge(rank_fe_df,how='left',on=col)\n",
    "#         tmp_train_df.drop([col],axis=1,inplace=True)\n",
    "#         tmp_val_df.drop([col],axis=1,inplace=True)\n",
    "\n",
    "#         eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征选择\n",
    "一股脑加上所有特征表现不佳，使用贪心策略(前向选择、后向选择)逐个添加特征。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# train_data=pd.read_csv('train.csv')\n",
    "# train_df=train_data[train_data.loc[:,'Time']<3]\n",
    "# val_df=train_data[train_data.loc[:,'Time']==3]\n",
    "\n",
    "# drop_cols=['SubwayLine','RentRoom','Time']        # 需要丢弃的原生特征\n",
    "\n",
    "# comb_train_df=train_df.copy()\n",
    "# comb_val_df=val_df.copy()\n",
    "\n",
    "# # 前向特征选择这块我是用for循环暴力搜出来的最优特征组合，最终筛选出来的特征组合为：\n",
    "# # ['ab_Height','TolRooms','Area/Room','BusLoc_rank','SubwayLine_rank']\n",
    "\n",
    "# comb_train_df.loc[:,'ab_Height']=comb_train_df.loc[:,'Height']/(comb_train_df.loc[:,'TolHeight']+1)\n",
    "# comb_val_df.loc[:,'ab_Height']=comb_val_df.loc[:,'Height']/(comb_val_df.loc[:,'TolHeight']+1)\n",
    "\n",
    "# comb_train_df.loc[:,'TolRooms']=comb_train_df.loc[:,'Livingroom']+comb_train_df.loc[:,'Bedroom']+comb_train_df.loc[:,'Bathroom']\n",
    "# comb_val_df.loc[:,'TolRooms']=comb_val_df.loc[:,'Livingroom']+comb_val_df.loc[:,'Bedroom']+comb_val_df.loc[:,'Bathroom']\n",
    "# comb_train_df.loc[:,'Area/Room']=comb_train_df.loc[:,'RoomArea']/(comb_train_df.loc[:,'TolRooms']+1)\n",
    "# comb_val_df.loc[:,'Area/Room']=comb_val_df.loc[:,'RoomArea']/(comb_val_df.loc[:,'TolRooms']+1)\n",
    "\n",
    "# rank_cols=['BusLoc','SubwayLine']\n",
    "# for col in rank_cols:\n",
    "#     rank_df=train_df.loc[:,[col,'Rental']].groupby(col,as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "#     rank_df.loc[:,col+'_rank']=rank_df.index+1        # +1，为缺失值预留一个0值的rank\n",
    "#     rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "#     comb_train_df=comb_train_df.merge(rank_fe_df,how='left',on=col)\n",
    "#     comb_val_df=comb_val_df.merge(rank_fe_df,how='left',on=col)\n",
    "#     try:\n",
    "#         comb_train_df.drop([col],axis=1,inplace=True)\n",
    "#         comb_val_df.drop([col],axis=1,inplace=True)\n",
    "#     except Exception as e:\n",
    "#         print(e)\n",
    "# for drop_col in drop_cols:\n",
    "#         try:\n",
    "#             comb_train_df.drop(drop_col,axis=1,inplace=True)\n",
    "#             comb_val_df.drop(drop_col,axis=1,inplace=True)\n",
    "#         except Exception as e:\n",
    "#             pass\n",
    "\n",
    "# # 贪心策略添加特征，目前为:2.403\n",
    "# eval_df=xgb_eval(train_df=comb_train_df,val_df=comb_val_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\tvalidation_0-rmse:8.78249\n",
      "Will train until validation_0-rmse hasn't improved in 10 rounds.\n",
      "[100]\tvalidation_0-rmse:2.36576\n",
      "[200]\tvalidation_0-rmse:2.23093\n",
      "[300]\tvalidation_0-rmse:2.1509\n",
      "[400]\tvalidation_0-rmse:2.08998\n",
      "[500]\tvalidation_0-rmse:2.05739\n",
      "[600]\tvalidation_0-rmse:2.03298\n",
      "[700]\tvalidation_0-rmse:2.0145\n",
      "[800]\tvalidation_0-rmse:2.00196\n",
      "[900]\tvalidation_0-rmse:1.99024\n",
      "[1000]\tvalidation_0-rmse:1.98088\n",
      "[1100]\tvalidation_0-rmse:1.97277\n",
      "Stopping. Best iteration:\n",
      "[1168]\tvalidation_0-rmse:1.96779\n",
      "\n",
      "1.9677954303629377 \n",
      "                 fe        im\n",
      "3          RoomArea  0.176308\n",
      "0      Neighborhood  0.151993\n",
      "16        Area/Room  0.146165\n",
      "4           RoomDir  0.083905\n",
      "2         TolHeight  0.073936\n",
      "12        SubwayDis  0.068511\n",
      "17      BusLoc_rank  0.055537\n",
      "14        ab_Height  0.051043\n",
      "1            Height  0.039684\n",
      "11        SubwaySta  0.034333\n",
      "10           Region  0.027275\n",
      "6           Bedroom  0.018188\n",
      "15         TolRooms  0.014438\n",
      "7        Livingroom  0.014165\n",
      "5        RentStatus  0.010863\n",
      "8          Bathroom  0.010770\n",
      "18  SubwayLine_rank  0.009441\n",
      "13        RemodCond  0.007573\n",
      "9          RentType  0.005872\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 432x288 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3IAAAD8CAYAAAAyhZbUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3X+QXOV95/vPM62RGGRgEEasNUiWkBW5TOQg76wl1ey9hUmwCNgwl6x/EGnXm+sVdW95N8E4451JVAFuidKkdJc4W7ubWyjJvUmhi2VhVQevdKOVI3N3VyWRDB7JE2x0QfwQNBhkiwECbTQaPfeP7jN095xzus/p0+dXv19VFOpnevo83X265/me5/t8H2OtFQAAAAAgO3qS7gAAAAAAIBgCOQAAAADIGAI5AAAAAMgYAjkAAAAAyBgCOQAAAADIGAI5AAAAAMgYAjkAAAAAyBgCOQAAAADIGAI5AAAAAMiYBUl3oNaHP/xhu3LlyqS7AQAAAACJeOqpp35mrb262f1SFcitXLlSExMTSXcDAAAAABJhjHmplfuRWgkAAAAAGUMgBwAAAAAZQyAHAAAAABlDIAcAAAAAGUMgBwAAAAAZk6qqlUASipMl7Tp0Sq9Ol7Wsv08jm9dqeP1A0t0CAAAAPBHIoasVJ0sa2z+l8sysJKk0XdbY/ilJIpgDAABAahHIdbktu4/p6Olzc7eHVi/Rnm2bEuxRvHYdOjUXxDnKM7PadegUgRwAAABSK7I1csaYgjFm0hjzn6u3VxljnjTGPGeM2WuMWRjVsRCNxiBOko6ePqctu48l1KP4vTpdDtQOAAAApEGUxU5+R9JPam7/oaQ/stZ+TNKbkr4a4bEQgcYgrll7Hi3r7wvUDgAAAKRBJIGcMeZaSbdJ+tPqbSPpJkmPVe/yF5KGozgWEKWRzWvV11uoa+vrLWhk89qEegQAAAA0F9UauW9J+qaky6q3r5I0ba29UL39iiQWHCF1nHVwVK0EAABAlrQdyBljPifpDWvtU8aYG0P8/t2S7pakFStWtNsdBDC0eolrGuXQ6iUJ9CY5w+sHCNwAAACQKVGkVg5Jut0Y86Kkb6uSUvnHkvqNMU6geK2kktsvW2sfttYOWmsHr7766gi6g1bt2bZpXtDWbVUr86Y4WdLQ+BGtGj2gofEjKk66fuwAAACQccZaG92DVWbkftda+zljzD5J37XWftsY839I+pG19j/5/f7g4KCdmJiIrD9AN2ncE0+qrPfbeec6ZhwBAAAywhjzlLV2sNn9oqxa2ejfSrrXGPOcKmvm/qyDxwK6nt+eeAAAAMiXSDcEt9Y+IemJ6r+fl/TpKB8fgDf2xAMAAOgenZyRAxAj9sQDAADoHgRyQE6wJx4AAED3iDS1EkBy2BMPAACgexDIATnCnngAAADdgUAOXW97cUqPPvmyZq1VwRjdtWG5dgyvS7pbAAAAgCcCOXS17cUpPXL8zNztWWvnbhPMAQAAIK0odoKu9uiTLwdqBwAAANKAQA5dbdbaQO0AAABAGpBamQGs4eqcgjGuQVvBmAR6AwAAALSGGbmUc9ZwOcGGs4Zre3Eq4Z7lw10blgdqBwAAANKAQC7lWMPVWTuG12nrxhVzM3AFY7R14wpmPAEAAJBqpFamHGu4Om/H8DoCNwAAAGQKM3Ip57VWizVcAAAAQPcikEs51nABAAAAaERqZco5KX9UrQQAAADgMDZFa60GBwftxMRE0t0AAAAAgEQYY56y1g42ux+plQAAAACQMaRWAjlSnCxp16FTenW6rGX9fRrZvFbD6weS7pakdPcNAAAgawjk0PW2F6dysQaxOFnS2P4plWdmJUml6bLG9lc2jk86YEpz3wAAALKIQA5dbXtxSo8cPzN3e9baudt+wVwag79dh07NBUqO8sysdh06lXiwlOa+AQAAZBFr5NDVHn3y5UDt0gfBn7MpuxP8bS9OdaSPrXp1uhyoPU5p7lucipMlDY0f0arRAxoaP6LiZCnpLgEAgIwikENXm/Wo2urVLoUL/uKwrL8vUHuc0ty3uDjppaXpsqw+SC8lmAMAAGEQyKGrFYwJ1C6FC/6kzs/GjGxeq77eQl1bX29BI5vXRnqcMNLct7j4pZcCAAAERSCHrnbXhuWB2qVwwV8cszHD6we08851Gujvk5E00N+nnXeuS8UatOH1A/rUiivq2j614opU9C0upJcCAIAoUewEXc0pUBKkcMldG5bXFUipbfcSV7GP4fUDqQyOthendPT0ubq2o6fPaXtxKvEiMXG5dGFB756fdW0HAAAIikAOkUtjRUc/O4bXBepfmOCv22dj/NYVpvnciNJ7LkGcXzsAAIAfAjlEKmw5/6wJGvwt6+9TySVo65ZiH2HXFeaJ1zPtnlcAAABEqe1AzhizXNJfSrpGlTHJw9baPzbGLJG0V9JKSS9K+qK19s12j5cFxcmSdh06pVeny1rW36eRzWtjSXdL6ri1vGZe9hw/ox88czbRviVpZPPaug2xpWwX+7j5oSf07Bvvzt1es3SxDt97o+f9C8a4Bm1+6wrzhtcAAABEKYoZuQuSvmGt/aEx5jJJTxljDkv6l5L+xlo7bowZlTQq6d9GcLxUc4paOAN2p6iFpI4GLkkdt5HXDItT4EPV/9+z94Tu2Xsi9tTLpNI+nfcgSKAdJjCPI5hvDOIk6dk33tXNDz3hGcyFWVeYN3G9Bmm4oAMAADqv7UDOWvuapNeq/37HGPMTSQOS7pB0Y/VufyHpCeUskHMbMMVV1KJRUsdt5DXr4CXq1Eu/maKk0z4nXjqnn771C1lJP33rF5p46Zzne1OcLGlk30nNXKy8lqXpskb2nZTkHZjHFcw3BnHN2qVw6wrzJo7XIC0XdAAAQOcZG+EaFWPMSkn/VdIvSzpjre2vthtJbzq3vQwODtqJiYnI+tNJjQMmqZIq1xhMOYykF8Zv61h/Vo0ecF1r0+njNmoMllpVMEand97a1rHdZoqkD4K51WMHPVPb2j12M16vy9aNK1wH8jc88F80XZ6Z197f16sT933W9RhD40dc1+EN9Pfp6OhNIXrtbuXoAc+fvRjjuRalvMxixXUOAACAzjHGPGWtHWx2v8j2kTPGfEjSdyXdY619u/ZnthItukaMxpi7jTETxpiJs2fPRtWdjvOaAfNa79LpohZejx93MY0dw+u0deOKudehYIwWt1BePYqiF81mipIsuOFXtdGNWxDn1y5RGTOsOPb4iwvnAAAA3SOSqpXGmF5Vgrg91tr91ebXjTEfsda+Zoz5iKQ33H7XWvuwpIelyoxcFP2Jg9fAaNbaeTNzcRS1iKuYxpbdx+r2AxtavUR7tm2qu09jRUe32ctGcRR88Er77DGVmYxOzsbEEURe0tuj8sxF1/YorVm62HPmM4vSkpYchW6vjgoAQDdpe4RXTZv8M0k/sdY+VPOjxyV9pfrvr0j6q3aPlSZeA6OB/j7tvHOdBvr7ZGpud3pAOLx+oOPHbQzipMqmzlt2H2u5b17iKHrheQyr1M3GXHlpb6B2SXr/wvwgzq89rMP33jgvaGtWtTLN8jSLNbJ5rfp662fAs1wdFQAAeItiRm5I0j+XNGWMOVFt+z1J45K+Y4z5qqSXJH0xgmOlht8M2PD6gUSu5Hf6uI1BXLP2WrV961TlyMsXFfT2+/Nn/S5fVBnY7hhepxfO/kNdf3t7pMZJrFZmYzq9puq+z1+vkcdOamb2gxm73oLRfZ+/3vN3LnpM7nm1tyOrQZubPM1ihamOioq8rJMEAHSPKKpW/ndVamq4+dV2Hz+tGDCFF3Qz7Va9e9595slpL06W9MMzb9X9zCUTUZL/bEyYyoADHsGC1yxlmPOLfcrCydsef0ldSMoyqn0CALIokjVy3YoBU3SiuBrebB2a21ooL36zMWHWVIUJFoKeX+zVFg4XZZCndZIAgO5BINdBSW0+3SlDq5e4plEOrV7S9Hf99neL6mp4sxmpIGue/AKsMGuq4ggW2KstvDxdlCFFMLg8rZMEAHQPArkOSXrz6U7Ys21TS1UrG7nt7/bsG+/q5oee0OF7b4zsanizGSmvtVBu9k2c8Tz2FX29rtsAXNHnXYhEiidY6FTaat7lJfghRTCcPK2TBAB0DwK5NvgFNX77hrU60A4TNHVamOM3298tqqvhzWak3NIbvfgVcPFacsZStGzKU/BDimA4eVsnCQDoDgRyIfmV4t+zbVPb+4Y1e/y0aWdGI8qr4X4zUm7pja3O0NWafs9js26PdkccqbZxpfN+bOyALtScyguM9NzO2yI/ThwXM/IU/JAiGA7rJAEAWUQgF1KzUvztVhBsp9R/3Nqd0YjzanhjeuPK0QOBHyNM4BlHqm1c6byNQZwkXbCV9iiDubguZuQp+CFFMLw8rZOMSxqzRtqRt3XtAPKv7Q3B4c6rUmBUFQSLkyUNjR/RqtEDGho/EnoD6y27j2nl6IG5/5pt7u3Gb0ZD0rzNox1OexybmXvxKtTiV8AlyKbLzvvktnZP8k7BDcPrGF7tYTUGcc3aw4rrYoZXkJPF4IcNwREXvwstWeRcCHMuwDoXwrYXpxLuGQB4Y0auQzpZQdBrBmzfxBkdf/7Nlo/XyoyHW7XJDdddVfe8vNJFnRmNw/fe6Fu1UkruaviebZu04cHDev2d83Nt11y20Peqslca1r6JM7pn74m5+61ZulivvPkL3zV5rabaonNGNq/VvXtPqHZLwR75Vy5NK1IEEZcsZY20Iop17QAQNwK5EPyuONbO5HSqgqDXDFjtH9BW0uqa/SH2qjZZ2+YXiFhJQ+NHNLJ5bV3Q5iaqFJ2gqTHbi1N1QZwkvf7OeW0vTvn+XmPg6RYUexV5QbpMvHROjfvCX6y2ZzEAIkUQCK7dde0AkARSKwNyG7A7olwfMOCR1jXQ3xdo7U47qXtRBCLObKFf6mdUKTphUmP8rsIGkdWr0GEs8Fjm6dUeVpi01yC2F6e0euxgLGmvjqhSopM+BpA3XuvXW13XDgBJIJALyG/AHuUib7+1LkHW7sR5NdEr+KxdL+fGb2Zw9dhBrRw9oNVjB7W9ODU3+K5tc4RZI5b0VdgsDhK+vGFFoPaw9mzbNC9oa+ViSSuBTGPQ7ybqc6A4WdLX955Qabosq8pFjq/vPRFpoFWcLGnksZN1xxh57CTBHCLX6zF68GpPu06vaweATiC1MqVq17qUpssqGKPyzKwe+N7Ter+FfdAcfmHC0OolrkFUmBmPgjE6OnqTVo0ekNvwt3YWsTH90U/j7Frjz4JWZWzcJsFIrv2NK8CatVZD40dc1zOldZNqv4A56lTioBdHWq2g2spsW9TnwO/uOznvXLPV9qje1we+97RmZuuPMjNr9cD3nk7FuYP82PWFG+rWBNe2Z1En17UDQKcQyEXIGZBf0dcrYyr7irUzAHd+p3Zg+qbLXmX9fb0qz8zq/QuNK32kSxcW5rU59mzb5Ls2bc3SxS2nV1539aWSmpc/dyuR365WF6O7DfJ7jOTWhY3XXen6+05g1X9pr6yV3ipX3mOv12rN0sV67/xF3/3qnJ/VBh2SAm/p0GOkiy7PpadJPBJXwBjHcVrdE66V887tHGjHBbc3x6c9DLfvB792IKw8Ftbp1Lp2AOgUArmAvGaxeswHA/Lp8geDplYG4H4DXLeBaaPFixborbL7QO298/6/Wzvj4QR1zt5qzsxcK8Hc82ffkyStvMo9kFt5VSWQ68S6I2dWqxm319JrDP3iz+ufQ2MQWDswLk2X1ddbmBfM1QbFre5XV5uGGnSTaq/YxC9mcVLxnFmc0nRZ9+w9oXv2ntBAhAMzt+OMPHZSkv9eg0GDv1b3hPOrtup4+tV3fH+eJ2md/UW6UVgHAJJFIBeQ2yzWogU9rrNhDr8BuF8qmHO7GWfw1c5GwF4FR4ZWL6mrOOkVkDiD4uPPv+n6c6e9U2vPWn2dWtV432YBdXlmVu+dv6gXx9vfENuvn34/W+hxHi5c4L1oxS0VzxF0Y3c/YVL+wmw03+rn4K4Ny5vurzftcXEkzS7t7dF7My4z8z4Ll8K8zgAAIHkZXZacrD3bNunF8dvmZqz8gjhHabo8r/DC9uKU7tl7wnXm5f7Hn64L6PxYSZcudH8rnZmwZlrdE8grTc+oklrqVzxk9djBlvrSKUGKxDQ+i1aCwCCBop9l/X3qv7TX9Wde7ZL3eeh3fjZLuWtWqKZVYVL+mm0076bVDbF3DK/T1o0rYi02E0e1zzDnQJjXGeHkraJo3p4P0suv0BnQzZiRC8FvCwI/Ttra1/ee0MearD8LOhvg9VjHnvfvp5NS5ae2GIdn+p6az4pFMRt3+aKC3j1/MdRjfebjVzedham1euzg3KL3S3p7VHaZ6ah1RZ93kOW1fq2RE3Tc//jTrj+vfdpBisa0ozFA9VsPGCWv88nvPAuybsdZD3Pd2IFQawuDuuDx/nu1h+ExuerZLrWejor25G3mM2y6NBCU29r6oIXOgLwikAsobBBXyyq+zaL9gofGgYWX2mIcUSsYo4ULTNMgyfH2+61X7Gz0g2fOBrp/bcXM8oxVjzRv4+hafrHUogXugWBvj7T08r55QcfXXarBSZpbCxm2aEyY87dxJtPr3PU7p/v7el0vTvT7BL9hNVu307gezOszEmENklRb0CO5ffx8MnIlSR8bO1AXhC4w0nM7208tzqtWC/FkBRVSERe//V4J5NDtCOQCyuLGz7UzN2lzeuetkQTHrWg7EDXSwBXua7Ak/zRBr0B15qJ0dPSmee3N1ruFKRoT5nV2S0sMY5FH/mBtexzngdusiJcs7vEXhtc1FL9rK41BnFSZWfzY2AHfYK5xFrmbyrvnbeaTCqmIS9L7vQJpRiDXBYKkEybBq0BK2ly0laCr1QqUrXLbR67ZWqegf8CKk6XAQZJTtXLfxBnX/aKCeP2d877tcQXzrVSBdTBI8BYmTbSV9Ci36pkTL53LRfAXduYT6HZeVYa75WIb4Ic/IUjUytEDmRkwt7JmauXogbn/tuw+1tLjlqbLc2sMx/ZPtVQwIOgfsFYL59Q6OnqT9k2ciSXAimumO8nZj60bVwRqzxu/9Cjpg9nS2s/DvXtP6JHjZ+rSnB85fiaThQ7CzHymmVdadCfSpdHd7tqwPFA70E0I5AK6fJH3BttIryhm0S5atbRfnePo6XMtB3OOVqsFBv0D1uoslMOpVp/FVGI/QSqXRq2xUmbBGG3duCLx2SWn+m6r7WE1S49y3efR47E6sR8lVfGCuf/269XbcHWrt8fo/tuvT6hHyKu0fncCaUAgF0BxstRWsQ1kX9B1dkdPnws8IGx2jKHxIxr86JLIB9q1op4l8KpoGXWly2bctifodnu2bZp3LtVuZh8Vr1lkpz3IbGnUs/hO2mceZv7iMrx+QEsW18++LVncS6ETdMSO4XU6vfNWvTh+m07vvJUgDqhijVwA7KuEMKJeo+iU+b7gV1M+ZZZetsi1quXSyxZJqgQOccz+uW1P0IlqrG46UUI7qrWFUQdtbrw2YXdml4O8F1GvjYmjKt41ly10XSt6zWULI3n8uN380BPzns/r75zXzQ89ocP33phMpwCgyzAj16LiZCm2AR/QzMysnbdpeZo123DebVaoU4bXD+jo6E1aetnCWD/TzdaIBRVlgZgtu48FWt8ZZr1fs/Qot9lSrz9QUa+NiaMq3s/+wb2ao1d72oXZhiTtSK8FkDXMyLXAWYQPoHMaZ4Wirg5aa8ODhz0raXZK1MFClEFc42M56zu9ZuqefP7ngdodzibsbtxmS1de1RfLTG0cVfEooZ5ubDoNIIsI5FoQpGQ50M3ctlJIo7iDuDRrNlvaGOgNrV7SsdmYxs3crxtzD+b/7yfPRDq4bpb2GYW0l1B3e5/jSLlNCzadBpBFHQ/kjDG3SPpjSQVJf2qtHe/0MaNGSiXQGuez4qzjk9RyMNe4h1hYn7zvr+uKEl2+qKAfPXBL6MfLK+f19uM1WxeXix6TVV7tYQ1+dIlrIDf40ejSfeMIFsMKMyvbI/eqop1YrxHHRvJ+M6YrRw9keg9DtMZtH8u0XowEHB0N5IwxBUn/UdLNkl6R9HfGmMettT/u5HEBtK/d1MaZWat79p7QN75z0vd+Q+NH5l0sCXvxpDGIk6S335/VytED+taXbtDw+gFtePBwy49X+xoMrV6iN955v27Wac3SxXr9rV/MCxz/t+F18wYEYfgNLNopEOOkizfLNIg6aItjQB6G14b39+w9kcmBXNABabNZWTcfWlRwreL8oYi36OlEyqPb69NMVKmWQT8DYYOLvAUlnX4+xclS3fdAabo8dzvq1y2t34Nh3fzQE/P+LlLwKD6dLnbyaUnPWWuft9ael/RtSXd0+JgAUqTZGqAoZ7z9tgcZ2z+lT97316HTKo+ePjcvdfDZN951DRzv2Xti3kbvQbltkH3P3hNzBUmk8Hu9tZIu3hvyr8PQ+BGtGj2gofEjdZvb57HEf5AiMV7Va6Ouaut23oztn6p7L6Lg9VmLeoueqF83r9enVe3sYRj0MxD2vXSCksbvjqjPgbjEcU77XcyJUt6+BxuDOKnyd/Hmh55IpkNdyNgOLrQ2xvwzSbdYa/9V9fY/l7TBWvuv3e4/ODhoJyYmOtafsFaOHtAffP9hfeKN55PuCgBJG6+7yrX9eJNiG2nl9nx+eGZa5y/4D4ov7+vVJz5y+dxtv+dfe4y4XqceY3Td1Yv14Q8t0vHnz0mutVaNNl7nHpC2+nza5XecxtfY8ePX3tbb5fkVJ73uH9dz8TpvFi4o6FMr+l1/J0zf0vDehDlOK5+rZsI+v6CfgTDvZeU48bw3cQn7OgQR3/kc/HswzXJxrt1wg/StbyXdi3mMMU9Zaweb3S/x7QeMMXcbYyaMMRNnz55NujsAkAqtDDbdAolWLFzQPP1t4YKCLu/rbXo/Pxet1Zlzzoyr10XDdFdtfLs8ox+/9rZru9f9k+R13vidT17vc7vvfxq1G8RJ7RSnCfYZCPNe5lG+Xodsfg8ivTo9I7dJ0v3W2s3V22OSZK3d6Xb/tM7INeboA0jWi+O3ubZ3csuCTnJ7Pm5rB5v9rt/zr71fszVyfb0F7bxznYbXD7hWMwyyds5IemH8Nq0eO+hZtfH0zltdf7fV59OuVs6bxuMF7VsanovfcYJWrUz78/HS6ufKS+3eh0EF/Qx49XWgv09HR2/yPE5c701cwr4OQcT1moX5HkyzvJ1radLqjFynq1b+naQ1xphVkkqSvizpNzt8zMg5X9oEc0B29fUW1NsT/RqeVo8ddAuTlVf1daxibuOebVf09coYafq9mXmFBNwG80ECZqcC6cbrrnQNADded2WYp4CIxbXVQNCAcc3Sxa7bWqxZujjU8Uc2r513EaOVz2cURSmCVi716mvY4klZlafXIc3Va8MoGGnWZT6okI5dVbpCR1MrrbUXJP1rSYck/UTSd6y1T3fymJ2yY3gdVxeADNt55zr96IFbdM1lC0M/xiUt/HW6fFFB3/rSDRro75NR5arxzjuDD/6OP/9m0/uELXYiVYK5o6M36YXx23Tivs9q8g8+qxfGb9PR0Zsiq9JWO9h68efuQalXezuKkyXPoitR8Xrt23lPsiTM8/fb5sDL4XtvnBe0tVMVb3j9gHbeuS7Q5/PF8dt0euetbVcW3DG8Tls3rpjbO7BgjO8Mn1dfs1x9Mow4XoetG1cEag8r6DmQdv/uizcEakf0Or6PnLX2oKSDnT5OXL71pRs08thJzbhdggCgSwpGv6j5fHhdUY+S2wCxkfNH/8nfv7muvdXZpdqZA7/fcfasaxxkBK1+1qzaZ1o3bB7o73MtEf6qx+yiV3tYjWmjtVUJww783IKTPds2xbKJdlrLyO/Ztsm17Ljf8w+zzYGkyEuZN248L0VfndDLjuF1gQbtbn1txsirnEZ2hXkdgnDekzi2BQh6DqRZY3ZHmr6jukXHA7m8qT1pS9NlFYzRrLVaWDA6T3CHLufs1daok2vXWgniagXZeDzJWXjnu8WtPc1rKbzWrCzrd08VbWfzdzduWyuUZ2a169CpUIMLv+Cs04F0mKB0wON1Hoj4dS5OlvTKm7+oa3vlzV+oOFliEJewLRtXuKbvbYl4dilv8hRgxanTQTb8EciFEMVJ67ZxMZAmXoGEn3ZnPsJoJYhzZlTcBsZx2eoxuPJK3cnbWoow61y8Cqv4pe9FPfMXVbDW19uj8sxF13YvYYLSuNYTRR0wIzpxzi4BSFbi2w90qx89cIsuX1RfArxxbc2iBbw9SE6YWR9nINdJQYsc9BjpC4OVYKmVjbBbEWZ90OBHl6jQU5/cVOgxGvyo++/kbS1FmHUue7ZtmveaNktf9Jrhi3rmL6idd35y3h/cnmq7l1aC0sZNyfdNnIllXVWYgDnN6wrT3Lcwdgyv0+mdt0a2vg9AOjEjlyBnLU2jxj+4Gx48rNffOT93+5rLFupjSz9Ud6W6cV0S0MzihQW9e94/qLnmsoV1514rStNlDY0f6Uie/DWXLQy8Xuai1dwsQVTrscKsj9p16JRmL9Z/RmcvWt8ZjLSm+vT39WraZb+0/ib7joXJZgg6I5bWCndh1pI0S0f1Kh4ieae4RiVMqmxc6wrDSHPfAMALgVwGNBZn8NK48Bzw8+D/tK7pAv+f/UO4zY1L02XdW33sKIO51985P28NTiv7mjkBnNfgM4ygA7y4in3E4f7br9fIvpOaqQlMe3uM7r/9+siPFbTYR5oX3wcNZJsFpWGLh0QhbMCc5sAozX0DADcEcjlSO1PROPi5MDsbeGYF+bZvovm+iEHXyNW6KGls/48iH0Dfs/dE3WO6XUlv5MwSuA0+4xJHsY8eU5mBdGuPUlzBUnGyVBcwlqbLGtl3sq4PXv1LQ+DWrrQHpVI6+wYA3YJALqfcBjJupaK/9pk1c3+I+3p7VL5wUdZW1uNsvO5KHXv+nOvAsFt5pZTFyatoRlBxXLV3K+7QCc6V9MZiJlL9LIHb4DOugief+fjVru/bZz5+dWTH8PqsduIzHEewdP/jT9fN+knSzEWr+x9/OpMBQ5itBNIclKa5bwDQDQjkuohSFZ8ZAAAb1klEQVTX2iK/P8RuA4+49tsJIqrgppkT9322o6X0pebVIp11U41rJ7PKq1x6GK3MEjQOPjv9fjp+8MzZQO1pF8d6Iq+LJklfTAmjU/vbBa3qCQDIDwI5+HK74urc3l6cmlfe+Mnnfx77Oj2nkl8cgVwzzgCqcYAbZPbrYovpjGE3tg6i3SCrlZS+o6M3adXoAdcNbMNI6yxBntbIeRXZ2LL7GOuMPHSiXD8FOqKV1s3XAcALgRxC86qoF6ToykD1j+W+iTMtBztJbdLsBGlGcg06jLwXywcJsuJK92slwFx5VXt9+c0NrW1AG2XmX1oHY3FtiB2HuIpsLFrQo/cvzE/PjXprljhmtsIG8s3OZ4K2aHRixjRJbhda01gBF0B7COQQucYUzsaCBVKlwt2uL/zK3B/Ixj+UnUp3W2Cka67oC7U+yplx8Ao6oghGnPVcUaevNg5Unav2zV7n48+/GfqYQfY8iyq9sjhZ0shjJzUzW1Mc47HmxTHikNay+Gl23iWI82sPa8+2Ta5riKMMksIE8mkPLvIULORpg/Ptxam6DJVZa+duZ/X9AeCOQA4dF1d1s8sXFfT2+97VCC8pGD3zYP0m136BjNtV+k4UCBno75v3uvgFcmFmnMIOSMNWrRzo7ws0YIiqmuQD33t6LohzzMxaPfC95ItjUOUvuE5eNKlVnCzpxZ+/V9f24s/fm7fVRTvCBPJpDi7yFizkKfX50Sdf9mzP4nsTl7RmcwB+COQQi6DrlsJsOHz7+gHXdXJBZoZqRRm0eW3YfknBBN641+sKfSc0K7ziJswsU1TVJN98z70Ihle75L0x+uKFhcDHb6aVz0EWBhNxpTzGJY4LAGEC+U6lY0Yhb8FCnlKfvb6z29lOJu/SPvsNeMnmX13k3v23X6/ehkoZzTYc9htYxOGayxZ6/uyZB2/VJYX65+M2Q9gKryv0nXDXhuWB7j/Q36edd64L9YdveP2Ajo7epBfGbwsc3LbDLYjza+8kZzBRmi7L6oPBRHGyFHtf/MSV8hiXMBcAwmg8x5t9TryCiFbSMTt9DuUtWBjZvFZ9vfUXb7Ka+lww7lWmvNrhP/sNpBkzckilMFevwwwselTZuLrVdsc1ly2sK/1/zWUL51WRbBQkaPMqqOKlU8VRnCvrtetg/F5PrwBszdLFrgVw1ixdPPfvxlmEMMLM5KZFcbKkb3zn5LzXNy2pdLXyNHuRZmlOx/T6LshqsJCn1OeN113pmlGy8borE+hNNuQptRbdhUAOqRU0HTPMwOKhL93guh7toS/d4FlJM47y3lti2hfPr8jI0PgRjWxeO686qVdV0tqgrNHhe290LSbhFMZxS2vx4pe+d//t17sW1vGbyU0D5/l7BclpG0zEVbglrn3S/CrRJinOdMyg7tqw3PU7KugsfpqkdeuSoF78uft77dUOLk4huwjkkBthBhZ+A6Xh9QOJ7dHkNhPm9fza4VdkxGuNQLOgzIvfz91mEdwYSX/4G5/0/HlWr6o3e/5pG0wMrx/QzoM/ruvz5ZcUIn+d49onLa6iKmGqPAYNLuIakHp9R2VxfVzeMLsUHFWFkVUEcsiNsAMLv4FSkns0ue3TF3UgVxv4uA3+vFKymgVtbvwGsa0OMBa0sMN4Fq+q+z3/NA4mbn7oibrUYkl6/Z3zuvmhJ0KdG37i+Ax6zUwPRBj8xFXlMc4BqddeokgWs0vBZfUiIEAgh1yJa2CR1P5JfqlmYatsOoHPqtEDrjMQUVzFbTaIbbVK5cxFm7r1YlHwev4FY0IXj+kkt9Rav/Yw4pwN99r4fuVV0Q1846ryGOeANAsVVrsRs0vhZPEiIEDVSiAgJyhx1jM5Qcn2Yue2AXDs2bZJly+qr6x2+aJCJAPcMBXyWtWsomiQAXMe04O8Kub9uy/+SlcOLBqDOKmyHcjK0QMd+Zx5bXzv1R5GnFUeg1bHDKM4WdLIvpN11TFH9p1MXYXVbjS8fkA771yngf4+GbVXTRhAujEjBwSU5P5JW3Yfm7fp+dvvz2rL7mNtV5Hr5FXcZoPYIAPmPKYHRTWLEkeKYBz8Zpc7kY6Yt1L6cbj/8afrigpJlRnz+x+Pbu89hMfsEtAdCOSAgJIc9HkNcI+ePqetHpUuW60i18mUrGZBZquvXZ7Tg6IYeI1sXqtv7Dup2ZoBdqHH5O41i/qiSVqrVqaZ2zYffu1ZQKoogKwhkAMCSuv+SVFUkevUVdxm+xp5vaZGlRm4Tg6s4ipxH4eJl87VBXGSNHvRauKlc7kakEZ90eTShQXXDeAvXVhwuXc4eZktzSu3LVDcqvYCQJoQyAEBpXn/pLRWkWu2r5HXa7pl44qOP58vDK5wDeS+MLiio8fthCTTfqPUrHhP1BdN3nMJ4vzaw8hbAYorL+3Vm+/Nn3278tLeBHrTvrg2UgeAKFHsBAhox/A6bd24Ym4wWTBGW2MIOCTvWaK4Z4+27D6mlaMH5v7bsvuY7/2b7WuU5Gs6tv9HgdrTLGzab3GypKHxI1o1ekBD40cSL1ixZ9sm33M66osmnSz048hbAYr7Pn+9egv1AXVvwei+z1+fUI/aw95rALKIGTkghKRmvuLaINmPV0XBLbuPefbjir5e17UzV/R9cPU+qde0PHMxUHuahUn7TWtK2Z5tm+ZtW9GqoJ+RuGbL8lSAIm/7bhV6jC5cdPnstLB3JQAkhUAOyJgkNymX/AuuePGKIxJeVpg7YdJ+w6SUxbXeK0yqaJgLDXkLSuKSp8DULYjzaweANGgrkDPG7JL0eUnnJZ2W9FvW2unqz8YkfVXSrKTfttYearOvADJq2mUtjV97nHqM5DZW65YL8WFSyuLYQFsKlyra7EKD12xdnoISqi8CQHdod43cYUm/bK39pKT/T9KYJBljPiHpy5Kul3SLpP9kjImu/BeATIljDVJYv7nBvaiJV3uaeaUh+qUnhnlv4thAW/JOCQ1b7MRvti4vnFTZ2o26x/ZPdWTdY9rWViJ+eToHthentHrsoFaOHtDqsYPaXpxKuktAU20Fctba/2KtvVC9eVzStdV/3yHp29ba9621L0h6TtKn2zkWgHQIU3BlZPNa9fXWX8tJS8W+JAutpEGY9yauvRS9UkLDFjsJkxacNX6pslGKM2CMwzWXLQzUjnydA856XOc7bNZaPXL8DMEcUi/KqpX/s6T/p/rvAUm1ixteqbYByDi3ioLNikmkvWLfjuF1Or3zVr04fptO77y1a4I4qfLe/MY/HqgLZH/jH6cjzTBMkB22smteZhbiqr4YV8AYlwUF96Qhr3bk6xzwW48LpFnTNXLGmO9L+kcuP/p9a+1fVe/z+5IuSNoTtAPGmLsl3S1JK1ZkL5UJ6EZhCq7kaQ1SnhQnS/ruU6W6K9HffaqkwY8uScX7FbSaqV9l15WjBzx/L42VO8NY5lGIJuo05ryV68/b84lDnl6zuLIMgKg1DeSstb/m93NjzL+U9DlJv2rt3BlfklSb+3Jttc3t8R+W9LAkDQ4O8okBgJAuKRj9Ynb+1+glBe81ZWGqVobZ5iBOXhcavDYaX7SgJzebQce1lUJcAWNc8vZ84pCn1yzt32mAl7ZSK40xt0j6pqTbrbXv1fzocUlfNsYsMsaskrRG0t+2cywAgL9nHrx1XtB2ScHomQdv9fydMFfVo167FhevtOD3L7jvGeg2SE27uNKY07zuNYy8PZ845Ok1y+p3GtDuPnL/QdIiSYdN5arFcWvt/2KtfdoY8x1JP1Yl5fJr1tpZn8cBAETAL2hzE+aqupPq+OiTL2vWWhWM0V0blmdibaHbbJ1fymUWxZHGnPa994JuwZD255NGeXrNsvydhu5mbIryfwcHB+3ExETS3QCAzNpenAo0GHEqzzWm4qWpGE2n+QVyL47fFmNPEAXOaQBZZ4x5ylo72Ox+7c7IAUDmeRXHyBqnhLbDKaEtyTOYy9NVdUAKt+4TALKIQA5AV/PbJDprwZxfCW2/WTkqiiJP8lRNEQD8RLmPHABkTp42iaaEdjgDHusBvdqRbl7rO7NYTREA/BDIAUBOeJXKpoS2vzxV30u77cUprR47qJWjB7R67KC2F6ciPwbvJ4BuQSAHADlBCe1whtcP6NorL6lru/bKS0g3jZizhrN28/lHjp+JPJiLawsGAEgaa+QAICcooR3Olt3H9Owb79a1PfvGu5lcJ5lmYddwhsG6TwDdgEAOQFcb8NhHLavro3YMryNwCyhP6yTjFHSrC9ZwAkC0SK0E0NU+8/GrA7UDCJcmyRpOAIgWgRyArvaDZ84GagfgnybphTWcSLPiZElD40e0avSAhsaPqDhZSrpLQFMEcgC6GntOYWj1kkDtCJcmuWN4nbZuXDE3A1cwRls3riAVGIkrTpY0tn9KpemyrKTSdFlj+6cI5pB6rJED0NWWeayRy+qeU8XJknYdOqVXp8ta1t+nkc1rKfrQxJ5tm+ZtDD+0eknkhU7y9N4UjHEN2pqlSbKGE2m069AplWdm69rKM7PadehUZj+j6A4EcgC62sjmtRrbP1X3Rzyre045V5Wd5+JcVZbEYKSJTlenzNt7c9eG5Xrk+BnXdiBryMxAVpFaCaCr5WnPKb+rykhW3t4b0iSRJ14ZGFnNzED3YEYOQNfLy55TXFUOr9Npj3l8b0iTRF7kKTPDkadUbngjkAOAnMjber+4xJH2yHsDpJfzOc9L4JO3VG54I7USAHJiZPNa9fUW6tqyflU5DnGkPfLeAOk2vH5AR0dv0gvjt+no6E2ZDnjylsoNb8zIAUBO5O2qclziSHvkvQEQlzymcsMdgRwA5Ehe1vvFKa60R94bAHEglbt7kFoJAOhqI5vXqrenfv+z3h5D2iMit2X3Ma0cPTD335bdx5LuEnKIVO7uQSAHAEDjPtb++1oDgTVuOi9JR0+fI5hD5PK0rQ78kVoJAOhquw6d0sysrWubmbXadegUAx9EpjGIa9YOtINU7u7AjBwAoKtRGAAAkEUEcgCAruZVAIDCAACANCOQAwB0NQoDIA5Dq5cEageAZgjkAABdjcIAiMOebZvmBW1Dq5doz7ZNCfUIQNYZa23ze8VkcHDQTkxMJN0NAADQZYqTJTZsB1Lq5oee0LNvvDt3e83SxTp8743JdajDjDFPWWsHm92PGTkAANDVipMlje2fUmm6LCupNF3W2P4pFSdLSXcN6HqNQZwkPfvGu7r5oSeS6VCKRBLIGWO+YYyxxpgPV28bY8y/N8Y8Z4z5kTHmU1EcBwAAIGq7Dp1SeWa2rq08M6tdh04l1KP2bC9OafXYQa0cPaDVYwe1vTiVdJeA0BqDuGbt3aTtQM4Ys1zSZyWdqWn+dUlrqv/dLelP2j0OAABAJ+RpC4rtxSk9cvyMZqtLZ2at1SPHzxDMATkUxYzcH0n6pqTaxXZ3SPpLW3FcUr8x5iMRHAsAACBSedqC4tEnXw7UjoriZElD40e0avSAhsaPkFaLTGgrkDPG3CGpZK092fCjAUm13xivVNsAAABSJU9bUMx6FLHzagdrJNNuzdLFgdq7SdNAzhjzfWPM37v8d4ek35P0B+10wBhztzFmwhgzcfbs2XYeCgAAILA8bUFRMCZQO/K3RjJvDt9747ygLe9VK1u1oNkdrLW/5tZujFknaZWkk6by5XCtpB8aYz4tqSRpec3dr622uT3+w5IelirbDwTpPAAAQBSG1w9kMnBrdNeG5Xrk+BnXdrjL0xrJvCJocxc6tdJaO2WtXWqtXWmtXalK+uSnrLU/lfS4pH9RrV65UdJb1trXoukyAAAA3OwYXqetG1fMzcAVjNHWjSu0Y3hdwj1LrzytkUR3aTojF9JBSbdKek7Se5J+q0PHAQAAQI0dw+sI3AIY2bxWY/un6tIrs7pGEt0lskCuOivn/NtK+lpUjw0AAAB0gpNSu+vQKb06Xday/j6NbF6bi1Rb5FunZuQAAAAy4+aHnqjbYJhiCt0lL2sk0V2i2EcOAAAgsxqDOEl69o13dfNDTyTTIQBoAYEcAADoao1BXLN2AEgDUisBAADQ1bbsPqajp8/N3R5avUR7tm1KsEdAcwRyAICuxyAO6F6Nn39JOnr6nLbsPsb3AFKN1EoAQFfzG8ShO6xZujhQO/Kl8fPfrL0dxcmShsaPaNXoAQ2NH1FxshT5MdA9COQAAF0tzkEc0unwvTfOC9qoWomoFSdLGts/pdJ0WVZSabqssf1TBHMIjdRKAADQ9Qja0Gm7Dp2q23Rcksozs9p16BRbHyAUZuQAAADQtYZWLwnUHtar0+VA7UAzBHIAgK4W1yAOQDrt2bZp3ue9EwWPlvX3BWoHmiG1EgDQ1fZs20TVSqDLxfF5H9m8VmP7p+rSK/t6CxrZvLbjx0Y+EcgBALoeQRuATnPWwe06dEqvTpe1rL9PI5vXsj4OoRHIAQAAADEYXj9A4IbIsEYOAAAAADKGQA4AAAAAMobUSgAA0PWKkyXWLgHIFAI5AADQ1YqTpbpqgqXpssb2T0kSwRyA1CK1EgAAdLVdh07VlYSXpPLMrHYdOpVQjwCgOQI5AADQ1V6dLgdqB4A0IJADAABdbVl/X6B2AEgDAjkAANDVRjavVaHH1LUVeoxGNq9NqEcA0ByBHAAA6GoTL53T7EVb1zZ70WripXMJ9QgAmiOQAwAAXe3RJ18O1A4AaUAgBwAAutqstYHaASANCOQAAEBXKxgTqB0A0oBADgAAdLW7NiwP1A4AabAg6Q4AAAAkacfwOkmVNXGz1qpgjO7asHyuHQDSyNg287+NMf9G0tckzUo6YK39ZrV9TNJXq+2/ba091OyxBgcH7cTERFv9AQAAAIIoTpa069ApvTpd1rL+Po1sXqvh9QNJdwtdyhjzlLV2sNn92pqRM8Z8RtIdkn7FWvu+MWZptf0Tkr4s6XpJyyR93xjzS9ba2XaOBwAAAESpOFnS2P4plWcqw9TSdFlj+6ckiWAOqdbuGrn/VdK4tfZ9SbLWvlFtv0PSt62171trX5D0nKRPt3ksAAAAIFK7Dp2aC+Ic5ZlZ7Tp0KqEeAa1pN5D7JUn/gzHmSWPM/2uM+SfV9gFJtZuvvFJtAwAAAFLj1elyoHYgLZqmVhpjvi/pH7n86Perv79E0kZJ/0TSd4wx1wXpgDHmbkl3S9KKFSuC/CoAAADQlmX9fSq5BG3L+vsS6A3QuqYzctbaX7PW/rLLf3+lykzbflvxt5IuSvqwpJKk2pq911bb3B7/YWvtoLV28Oqrr27/GQEAAAAtGtm8Vn29hbq2vt6CRjavTahHQGvaTa0sSvqMJBljfknSQkk/k/S4pC8bYxYZY1ZJWiPpb9s8FgAAABCp4fUD2nnnOg3098lIGujv084711HoBKnX7j5yfy7pz40xfy/pvKSv2Mp+Bk8bY74j6ceSLkj6GhUrAQAAkEbD6wdyFbhtL06xL2IXaCuQs9ael7TV42cPSnqwnccHAAAA0LrtxSk9cvzM3O1Za+duE8zlS7uplQAAAABS4tEnXw7UjuwikAMAAAByYtbaQO3ILgI5AAAAICcKxgRqR3YRyAEAAAA5cdeG5YHakV3tVq0EAAAAkBJOQROqVuafsSnKlx0cHLQTExNJdwMAAAAAEmGMecpaO9jsfszIAQAA5EhxsqRdh07p1emylvX3aWTz2lztkQaggkAOAAAgJ4qTJY3tn1J5ZlaSVJoua2z/lCQRzAE5Q7ETAACAnNh16NRcEOcoz8xq16FTCfUIQKcQyAEAAOTEq9PlQO0AsotADgAAICeW9fcFageQXQRyAAAAOTGyea36egt1bX29BY1sXptQjwB0CsVOAAAAcsIpaELVSiD/COQAAAByZHj9AIEb0AVIrQQAAACAjCGQAwAAAICMIZADAAAAgIwhkAMAAACAjCGQAwAAAICMMdbapPswxxhzVtJLSffDx4cl/SzpTiC3OL/QSZxf6BTOLXQS5xc6Ka3n10ettVc3u1OqArm0M8ZMWGsHk+4H8onzC53E+YVO4dxCJ3F+oZOyfn6RWgkAAAAAGUMgBwAAAAAZQyAXzMNJdwC5xvmFTuL8QqdwbqGTOL/QSZk+v1gjBwAAAAAZw4wcAAAAAGQMgVwLjDG3GGNOGWOeM8aMJt0fZJsxZrkx5gfGmB8bY542xvxOtX2JMeawMebZ6v+vTLqvyC5jTMEYM2mM+c/V26uMMU9Wv8f2GmMWJt1HZJMxpt8Y85gx5hljzE+MMZv4/kJUjDFfr/5t/HtjzKPGmEv4/kJYxpg/N8a8YYz5+5o21+8rU/Hvq+fZj4wxn0qu560hkGvCGFOQ9B8l/bqkT0i6yxjziWR7hYy7IOkb1tpPSNoo6WvVc2pU0t9Ya9dI+pvqbSCs35H0k5rbfyjpj6y1H5P0pqSvJtIr5MEfS/pra+3HJf2KKucZ319omzFmQNJvSxq01v6ypIKkL4vvL4T3f0m6paHN6/vq1yWtqf53t6Q/iamPoRHINfdpSc9Za5+31p6X9G1JdyTcJ2SYtfY1a+0Pq/9+R5VB0IAq59VfVO/2F5KGk+khss4Yc62k2yT9afW2kXSTpMeqd+H8QijGmCsk/Y+S/kySrLXnrbXT4vsL0Vkgqc8Ys0DSpZJeE99fCMla+18lnWto9vq+ukPSX9qK45L6jTEfiaen4RDINTcg6eWa269U24C2GWNWSlov6UlJ11hrX6v+6KeSrkmoW8i+b0n6pqSL1dtXSZq21l6o3uZ7DGGtknRW0v9ZTd39U2PMYvH9hQhYa0uS/ndJZ1QJ4N6S9JT4/kK0vL6vMjfmJ5ADEmKM+ZCk70q6x1r7du3PbKWcLCVlEZgx5nOS3rDWPpV0X5BLCyR9StKfWGvXS3pXDWmUfH8hrOpapTtUuWCwTNJizU+LAyKT9e8rArnmSpKW19y+ttoGhGaM6VUliNtjrd1fbX7dmcKv/v+NpPqHTBuSdLsx5kVVUsFvUmVNU381VUniewzhvSLpFWvtk9Xbj6kS2PH9hSj8mqQXrLVnrbUzkvar8p3G9xei5PV9lbkxP4Fcc38naU21YtJCVRbdPp5wn5Bh1fVKfybpJ9bah2p+9Likr1T//RVJfxV335B91toxa+211tqVqnxfHbHWbpH0A0n/rHo3zi+EYq39qaSXjTFrq02/KunH4vsL0TgjaaMx5tLq30rn/OL7C1Hy+r56XNK/qFav3CjprZoUzFRiQ/AWGGNuVWXNSUHSn1trH0y4S8gwY8w/lfTfJE3pgzVMv6fKOrnvSFoh6SVJX7TWNi7QBVpmjLlR0u9aaz9njLlOlRm6JZImJW211r6fZP+QTcaYG1QppLNQ0vOSfkuVC8N8f6FtxpgHJH1JlQrPk5L+lSrrlPj+QmDGmEcl3Sjpw5Jel3SfpKJcvq+qFw/+gyrpvO9J+i1r7UQS/W4VgRwAAAAAZAyplQAAAACQMQRyAAAAAJAxBHIAAAAAkDEEcgAAAACQMQRyAAAAAJAxBHIAAAAAkDEEcgAAAACQMQRyAAAAAJAx/z8RTnvFgnRwCgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1080x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def xgb_eval(train_df,val_df):\n",
    "    train_df=train_df.copy()\n",
    "    val_df=val_df.copy()\n",
    "\n",
    "    try:\n",
    "        from sklearn.preprocessing import LabelEncoder\n",
    "        lb_encoder=LabelEncoder()\n",
    "        lb_encoder.fit(train_df.loc[:,'RoomDir'].append(val_df.loc[:,'RoomDir']))\n",
    "        train_df.loc[:,'RoomDir']=lb_encoder.transform(train_df.loc[:,'RoomDir'])\n",
    "        val_df.loc[:,'RoomDir']=lb_encoder.transform(val_df.loc[:,'RoomDir'])\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "\n",
    "    import xgboost as xgb\n",
    "    X_train=train_df.drop(['Rental'],axis=1)\n",
    "    Y_train=train_df.loc[:,'Rental'].values\n",
    "    X_val=val_df.drop(['Rental'],axis=1)\n",
    "    Y_val=val_df.loc[:,'Rental'].values\n",
    "\n",
    "    from sklearn.metrics import mean_squared_error\n",
    "\n",
    "    try:\n",
    "        eval_df=val_df.copy().drop('Time',axis=1)\n",
    "    except Exception as e:\n",
    "        eval_df=val_df.copy()\n",
    "\n",
    "    reg_model=xgb.XGBRegressor(max_depth=8,n_estimators=99999,learning_rate=0.1,\n",
    "                               reg_alpha=0.5,reg_lambda=0.5,n_jobs=-1)\n",
    "    reg_model.fit(X_train,Y_train,eval_set=[(X_val,Y_val)],verbose=100,early_stopping_rounds=10)\n",
    "\n",
    "    y_pred=reg_model.predict(X_val)\n",
    "    print(np.sqrt(mean_squared_error(Y_val,y_pred)),end=' ')\n",
    "\n",
    "    eval_df.loc[:,'Y_pred']=y_pred\n",
    "    eval_df.loc[:,'RE']=eval_df.loc[:,'Y_pred']-eval_df.loc[:,'Rental']\n",
    "\n",
    "    print('')\n",
    "    feature=X_train.columns\n",
    "    fe_im=reg_model.feature_importances_\n",
    "    print(pd.DataFrame({'fe':feature,'im':fe_im}).sort_values(by='im',ascending=False))\n",
    "\n",
    "    import matplotlib.pyplot as plt\n",
    "    plt.clf()\n",
    "    plt.figure(figsize=(15,4))\n",
    "    plt.plot([Y_train.min(),Y_train.max()],[0,0],color='red')\n",
    "    plt.scatter(x=eval_df.loc[:,'Rental'],y=eval_df.loc[:,'RE'])\n",
    "    plt.show()\n",
    "\n",
    "    return eval_df\n",
    "\n",
    "# 调参记录\n",
    "# dep8\n",
    "#     est1000:1.985,\n",
    "#     est1880:1.943,1.94\n",
    "#     earning_rate=0.05,est2643:1.9446\n",
    "#     earning_rate=0.03,est4237:1.975\n",
    "#     reg_alpha0.5,reg_lambda0.5,est1597:1.945\n",
    "#         earning_rate=0.05,est2245:1.947\n",
    "#     min_child_weight2,est1070:1.97\n",
    "\n",
    "\n",
    "train_data=pd.read_csv('train.csv')\n",
    "train_df=train_data[train_data.loc[:,'Time']<3]\n",
    "val_df=train_data[train_data.loc[:,'Time']==3]\n",
    "\n",
    "drop_cols=['SubwayLine','RentRoom','Time']\n",
    "\n",
    "comb_train_df=train_df.copy()\n",
    "comb_val_df=val_df.copy()\n",
    "\n",
    "comb_train_df.loc[:,'ab_Height']=comb_train_df.loc[:,'Height']/(comb_train_df.loc[:,'TolHeight']+1)\n",
    "comb_val_df.loc[:,'ab_Height']=comb_val_df.loc[:,'Height']/(comb_val_df.loc[:,'TolHeight']+1)\n",
    "\n",
    "comb_train_df.loc[:,'TolRooms']=comb_train_df.loc[:,'Livingroom']+comb_train_df.loc[:,'Bedroom']+comb_train_df.loc[:,'Bathroom']\n",
    "comb_val_df.loc[:,'TolRooms']=comb_val_df.loc[:,'Livingroom']+comb_val_df.loc[:,'Bedroom']+comb_val_df.loc[:,'Bathroom']\n",
    "comb_train_df.loc[:,'Area/Room']=comb_train_df.loc[:,'RoomArea']/(comb_train_df.loc[:,'TolRooms']+1)\n",
    "comb_val_df.loc[:,'Area/Room']=comb_val_df.loc[:,'RoomArea']/(comb_val_df.loc[:,'TolRooms']+1)\n",
    "\n",
    "rank_cols=['BusLoc','SubwayLine']\n",
    "for col in rank_cols:\n",
    "    rank_df=train_df.loc[:,[col,'Rental']].groupby(col,as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "    rank_df.loc[:,col+'_rank']=rank_df.index+1        # +1，为缺失值预留一个0值的rank\n",
    "    rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "    comb_train_df=comb_train_df.merge(rank_fe_df,how='left',on=col)\n",
    "    comb_val_df=comb_val_df.merge(rank_fe_df,how='left',on=col)\n",
    "    try:\n",
    "        comb_train_df.drop([col],axis=1,inplace=True)\n",
    "        comb_val_df.drop([col],axis=1,inplace=True)\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "for drop_col in drop_cols:\n",
    "        try:\n",
    "            comb_train_df.drop(drop_col,axis=1,inplace=True)\n",
    "            comb_val_df.drop(drop_col,axis=1,inplace=True)\n",
    "        except Exception as e:\n",
    "            pass\n",
    "\n",
    "eval_df=xgb_eval(train_df=comb_train_df,val_df=comb_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# # 后向特征选择，虽然后向特征选择在未调参时比前向选择要好，但是调参之后的分数不如前向选择\n",
    "# # 所以这一部分弃用了，实际上可以将这两个模型结合起来的，这里是一个上分点，我懒得尝试了\n",
    "\n",
    "# def xgb_eval(train_df,val_df):\n",
    "#     train_df=train_df.copy()\n",
    "#     val_df=val_df.copy()\n",
    "\n",
    "#     try:\n",
    "#         from sklearn.preprocessing import LabelEncoder\n",
    "#         lb_encoder=LabelEncoder()\n",
    "#         lb_encoder.fit(train_df.loc[:,'RoomDir'].append(val_df.loc[:,'RoomDir']))\n",
    "#         train_df.loc[:,'RoomDir']=lb_encoder.transform(train_df.loc[:,'RoomDir'])\n",
    "#         val_df.loc[:,'RoomDir']=lb_encoder.transform(val_df.loc[:,'RoomDir'])\n",
    "#     except Exception as e:\n",
    "#         print(e)\n",
    "\n",
    "#     import xgboost as xgb\n",
    "#     X_train=train_df.drop(['Rental'],axis=1)\n",
    "#     Y_train=train_df.loc[:,'Rental'].values\n",
    "#     X_val=val_df.drop(['Rental'],axis=1)\n",
    "#     Y_val=val_df.loc[:,'Rental'].values\n",
    "\n",
    "#     from sklearn.metrics import mean_squared_error\n",
    "\n",
    "#     try:\n",
    "#         eval_df=val_df.copy().drop('Time',axis=1)\n",
    "#     except Exception as e:\n",
    "#         eval_df=val_df.copy()\n",
    "\n",
    "#     reg_model=xgb.XGBRegressor(max_depth=7,n_estimators=9999,n_jobs=-1)\n",
    "#     reg_model.fit(X_train,Y_train,eval_set=[(X_val,Y_val)],verbose=100,early_stopping_rounds=10)\n",
    "\n",
    "#     y_pred=reg_model.predict(X_val)\n",
    "#     print(np.sqrt(mean_squared_error(Y_val,y_pred)),end=' ')\n",
    "\n",
    "#     eval_df.loc[:,'Y_pred']=y_pred\n",
    "#     eval_df.loc[:,'RE']=eval_df.loc[:,'Y_pred']-eval_df.loc[:,'Rental']\n",
    "    \n",
    "#     print('')\n",
    "#     feature=X_train.columns\n",
    "#     fe_im=reg_model.feature_importances_\n",
    "#     print(pd.DataFrame({'fe':feature,'im':fe_im}).sort_values(by='im',ascending=False))\n",
    "\n",
    "#     import matplotlib.pyplot as plt\n",
    "#     plt.clf()\n",
    "#     plt.figure(figsize=(15,4))\n",
    "#     plt.plot([Y_train.min(),Y_train.max()],[0,0],color='red')\n",
    "#     plt.scatter(x=eval_df.loc[:,'Rental'],y=eval_df.loc[:,'RE'])\n",
    "#     plt.show()\n",
    "\n",
    "#     return eval_df\n",
    "\n",
    "# # dep5,est500:2.3769\n",
    "# # dep6,est500:2.2755\n",
    "# # dep7,est1768:2.0344\n",
    "# # dep8,est1036:1.999\n",
    "# # dep9,est680:2.0466\n",
    "\n",
    "# train_data=pd.read_csv('train.csv')\n",
    "# test_data=pd.read_csv('test.csv')\n",
    "# train_df=train_data[train_data.loc[:,'Time']<3]\n",
    "# val_df=train_data[train_data.loc[:,'Time']==3]\n",
    "\n",
    "# tmp_train_df=train_df.copy()\n",
    "# tmp_val_df=val_df.copy()\n",
    "\n",
    "# def ex_dir(direction):\n",
    "#     try:\n",
    "#         direction=direction.split(' ')[0]\n",
    "#     except Exception as e:\n",
    "#         pass\n",
    "#     return direction\n",
    "# tmp_train_df.loc[:,'RoomDir']=train_df.loc[:,'RoomDir'].apply(ex_dir)\n",
    "# tmp_val_df.loc[:,'RoomDir']=val_df.loc[:,'RoomDir'].apply(ex_dir)\n",
    "# rank_df=tmp_train_df.loc[:,['RoomDir','Rental']].groupby('RoomDir',as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "# rank_df.loc[:,'RoomDir'+'_rank']=rank_df.index+1\n",
    "# rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "# tmp_train_df=tmp_train_df.merge(rank_fe_df,how='left',on='RoomDir')\n",
    "# tmp_val_df=tmp_val_df.merge(rank_fe_df,how='left',on='RoomDir')\n",
    "# tmp_train_df.drop(['RoomDir'],axis=1,inplace=True)\n",
    "# tmp_val_df.drop(['RoomDir'],axis=1,inplace=True)\n",
    "\n",
    "# tmp_train_df.loc[:,'TolRooms']=tmp_train_df.loc[:,'Livingroom']+tmp_train_df.loc[:,'Bedroom']+tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_val_df.loc[:,'TolRooms']=tmp_val_df.loc[:,'Livingroom']+tmp_val_df.loc[:,'Bedroom']+tmp_val_df.loc[:,'Bathroom']\n",
    "# tmp_train_df.loc[:,'Area/Room']=tmp_train_df.loc[:,'RoomArea']/(tmp_train_df.loc[:,'TolRooms']+1)\n",
    "# tmp_val_df.loc[:,'Area/Room']=tmp_val_df.loc[:,'RoomArea']/(tmp_val_df.loc[:,'TolRooms']+1)\n",
    "\n",
    "# tmp_train_df.loc[:,'Area/Bedroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bedroom']\n",
    "# tmp_val_df.loc[:,'Area/Bedroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bedroom']\n",
    "# tmp_train_df.loc[:,'Area/Bathroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_val_df.loc[:,'Area/Bathroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Bathroom']\n",
    "# tmp_train_df.loc[:,'Area/Livingroom']=tmp_train_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_val_df.loc[:,'Area/Livingroom']=tmp_val_df.loc[:,'RoomArea']/tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_train_df.loc[:,'OtherArea']=tmp_train_df.loc[:,'RoomArea']-tmp_train_df.loc[:,'Area/Bedroom']-tmp_train_df.loc[:,'Area/Bathroom']-tmp_train_df.loc[:,'Area/Livingroom']\n",
    "# tmp_val_df.loc[:,'OtherArea']=tmp_val_df.loc[:,'RoomArea']-tmp_val_df.loc[:,'Area/Bedroom']-tmp_val_df.loc[:,'Area/Bathroom']-tmp_val_df.loc[:,'Area/Livingroom']\n",
    "\n",
    "# tmp_df=tmp_train_df.loc[:,['Height','TolHeight','RoomArea']].groupby(['TolHeight','Height'],as_index=False).sum()\n",
    "# tmp_df.rename(columns={'RoomArea':'AreaSumByHeight'},inplace=True)\n",
    "# tmp_train_df=tmp_train_df.merge(tmp_df,how='left',on=['TolHeight','Height'])\n",
    "# tmp_val_df=tmp_val_df.merge(tmp_df,how='left',on=['TolHeight','Height'])\n",
    "# tmp_df=tmp_train_df.loc[:,['Height','TolHeight','RoomArea']].groupby(['TolHeight','Height'],as_index=False).mean()\n",
    "# tmp_df.rename(columns={'RoomArea':'AreaMeanByHeight'},inplace=True)\n",
    "# tmp_train_df=tmp_train_df.merge(tmp_df,how='left',on=['TolHeight','Height'])\n",
    "# tmp_val_df=tmp_val_df.merge(tmp_df,how='left',on=['TolHeight','Height'])\n",
    "\n",
    "# tmp_train_df.loc[:,'RoomStr']=tmp_train_df.loc[:,'Bedroom']+tmp_train_df.loc[:,'Livingroom']\n",
    "# tmp_val_df.loc[:,'RoomStr']=tmp_val_df.loc[:,'Bedroom']+tmp_val_df.loc[:,'Livingroom']\n",
    "\n",
    "# tmp_train_df.loc[:,'ab_Height']=tmp_train_df.loc[:,'Height']/(tmp_train_df.loc[:,'TolHeight']+1)\n",
    "# tmp_val_df.loc[:,'ab_Height']=tmp_val_df.loc[:,'Height']/(tmp_val_df.loc[:,'TolHeight']+1)\n",
    "\n",
    "# tmp_train_df.loc[:,'Rent_pct']=tmp_train_df.loc[:,'RentRoom']/((tmp_train_df.loc[:,'TolHeight']+1))\n",
    "# tmp_val_df.loc[:,'Rent_pct']=tmp_val_df.loc[:,'RentRoom']/((tmp_val_df.loc[:,'TolHeight']+1))\n",
    "\n",
    "# rank_cols=['Height','TolHeight','Bedroom','Livingroom','Bathroom',\n",
    "#            'RentType','Region','BusLoc','SubwayLine','SubwaySta','RemodCond']\n",
    "# for col in rank_cols:\n",
    "#     if col!='Rental':\n",
    "#         rank_df=train_df.loc[:,[col,'Rental']].groupby(col,as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "#         rank_df.loc[:,col+'_rank']=rank_df.index+1        # +1，为缺失值预留一个0值的rank\n",
    "#         rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "#         tmp_train_df=tmp_train_df.merge(rank_fe_df,how='left',on=col)\n",
    "#         tmp_val_df=tmp_val_df.merge(rank_fe_df,how='left',on=col)\n",
    "\n",
    "# drop_cols=['Time','Area/Bedroom','OtherArea','TolHeight_rank','Height',\n",
    "#            'Area/Livingroom','Area/Bathroom','RentRoom','RemodCond_rank','Livingroom',\n",
    "#            'RentType_rank']\n",
    "# tmp_train_df.drop(drop_cols,axis=1,inplace=True)\n",
    "# tmp_val_df.drop(drop_cols,axis=1,inplace=True)\n",
    "\n",
    "# # 贪心策略减特征，目前为：2.377\n",
    "# eval_df=xgb_eval(train_df=tmp_train_df,val_df=tmp_val_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 预测提交"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['Neighborhood', 'Height', 'TolHeight', 'RoomArea', 'RoomDir',\n",
      "       'RentStatus', 'Bedroom', 'Livingroom', 'Bathroom', 'RentType', 'Region',\n",
      "       'SubwaySta', 'SubwayDis', 'RemodCond', 'Rental', 'ab_Height',\n",
      "       'TolRooms', 'Area/Room', 'BusLoc_rank', 'SubwayLine_rank'],\n",
      "      dtype='object') Index(['id', 'Neighborhood', 'Height', 'TolHeight', 'RoomArea', 'RoomDir',\n",
      "       'RentStatus', 'Bedroom', 'Livingroom', 'Bathroom', 'RentType', 'Region',\n",
      "       'SubwaySta', 'SubwayDis', 'RemodCond', 'ab_Height', 'TolRooms',\n",
      "       'Area/Room', 'BusLoc_rank', 'SubwayLine_rank'],\n",
      "      dtype='object')\n",
      "[0]\tvalidation_0-rmse:8.88846\n",
      "Will train until validation_0-rmse hasn't improved in 10 rounds.\n",
      "[100]\tvalidation_0-rmse:1.82338\n",
      "[200]\tvalidation_0-rmse:1.56472\n",
      "[300]\tvalidation_0-rmse:1.39574\n",
      "[400]\tvalidation_0-rmse:1.26954\n",
      "[500]\tvalidation_0-rmse:1.18678\n",
      "[600]\tvalidation_0-rmse:1.11666\n",
      "[700]\tvalidation_0-rmse:1.06078\n",
      "[800]\tvalidation_0-rmse:1.01602\n",
      "[900]\tvalidation_0-rmse:0.967317\n",
      "[1000]\tvalidation_0-rmse:0.934924\n",
      "[1100]\tvalidation_0-rmse:0.905556\n",
      "[1200]\tvalidation_0-rmse:0.877854\n",
      "[1300]\tvalidation_0-rmse:0.85011\n",
      "[1400]\tvalidation_0-rmse:0.831434\n",
      "[1500]\tvalidation_0-rmse:0.813272\n",
      "[1600]\tvalidation_0-rmse:0.792019\n",
      "[1700]\tvalidation_0-rmse:0.770273\n",
      "[1800]\tvalidation_0-rmse:0.75367\n",
      "[1879]\tvalidation_0-rmse:0.745837\n"
     ]
    }
   ],
   "source": [
    "def xgb_pred():\n",
    "    train_df=pd.read_csv('train.csv')\n",
    "    test_df=pd.read_csv('test.csv')\n",
    "\n",
    "    try:\n",
    "        from sklearn.preprocessing import LabelEncoder\n",
    "        lb_encoder=LabelEncoder()\n",
    "        lb_encoder.fit(train_df.loc[:,'RoomDir'].append(test_df.loc[:,'RoomDir']))\n",
    "        train_df.loc[:,'RoomDir']=lb_encoder.transform(train_df.loc[:,'RoomDir'])\n",
    "        test_df.loc[:,'RoomDir']=lb_encoder.transform(test_df.loc[:,'RoomDir'])\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "\n",
    "    train_df.loc[:,'ab_Height']=train_df.loc[:,'Height']/(train_df.loc[:,'TolHeight']+1)\n",
    "    test_df.loc[:,'ab_Height']=test_df.loc[:,'Height']/(test_df.loc[:,'TolHeight']+1)\n",
    "    train_df.loc[:,'TolRooms']=train_df.loc[:,'Livingroom']+train_df.loc[:,'Bedroom']+train_df.loc[:,'Bathroom']\n",
    "    test_df.loc[:,'TolRooms']=test_df.loc[:,'Livingroom']+test_df.loc[:,'Bedroom']+test_df.loc[:,'Bathroom']\n",
    "    train_df.loc[:,'Area/Room']=train_df.loc[:,'RoomArea']/(train_df.loc[:,'TolRooms']+1)\n",
    "    test_df.loc[:,'Area/Room']=test_df.loc[:,'RoomArea']/(test_df.loc[:,'TolRooms']+1)\n",
    "    \n",
    "    rank_cols=['BusLoc','SubwayLine']\n",
    "    for col in rank_cols:\n",
    "        rank_df=train_df.loc[:,[col,'Rental']].groupby(col,as_index=False).mean().sort_values(by='Rental').reset_index(drop=True)\n",
    "        rank_df.loc[:,col+'_rank']=rank_df.index+1        # +1，为缺失值预留一个0值的rank\n",
    "        rank_fe_df=rank_df.drop(['Rental'],axis=1)\n",
    "        train_df=train_df.merge(rank_fe_df,how='left',on=col)\n",
    "        test_df=test_df.merge(rank_fe_df,how='left',on=col)\n",
    "        try:\n",
    "            train_df.drop([col],axis=1,inplace=True)\n",
    "            test_df.drop([col],axis=1,inplace=True)\n",
    "        except Exception as e:\n",
    "            print(e)\n",
    "    for drop_col in drop_cols:\n",
    "            try:\n",
    "                train_df.drop(drop_col,axis=1,inplace=True)\n",
    "                test_df.drop(drop_col,axis=1,inplace=True)\n",
    "            except Exception as e:\n",
    "                pass\n",
    "            \n",
    "    print(train_df.columns,test_df.columns)\n",
    "\n",
    "    import xgboost as xgb\n",
    "    X_train=train_df.drop(['Rental'],axis=1)\n",
    "    Y_train=train_df.loc[:,'Rental'].values\n",
    "    test_id=test_df.loc[:,'id']\n",
    "    X_test=test_df.drop(['id'],axis=1)\n",
    "    \n",
    "\n",
    "    from sklearn.metrics import mean_squared_error\n",
    "\n",
    "    reg_model=xgb.XGBRegressor(max_depth=8,n_estimators=1880,n_jobs=-1)\n",
    "    reg_model.fit(X_train,Y_train,eval_set=[(X_train,Y_train)],verbose=100,early_stopping_rounds=10)\n",
    "\n",
    "    y_pred=reg_model.predict(X_test)\n",
    "\n",
    "    sub_df=pd.DataFrame({\n",
    "        'id':test_id,\n",
    "        'price':y_pred\n",
    "    })\n",
    "    sub_df.to_csv('./result/xgb.csv',index=False)\n",
    "\n",
    "    return None\n",
    "\n",
    "xgb_pred()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最后我的XGB单模分数为1.94，线下线上是一致的，总特征数二十多；队友的LGB单模分数是1.86，总特征三十多。\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
